
rm(list=ls())
library(foreign)
library(polycor)
library(MASS)
library(mirt)

set.seed(21042020)

fitMixIRT <- TRUE # fit Fowler et al model to original data sets
fitRegenMixIRT <- TRUE # fit Fowler et al model to regenerated data sets 

source("../Moderates Replication Archive/IRT_mixing/mixture_irt.R") # source model code from replication package
source("roll_call_regeneration_code.R") # source code to regenerate roll call data from multivariate normal

# To install aroma.light package, use code below
# if (!require("BiocManager", quietly = TRUE))
#    install.packages("BiocManager")
# BiocManager::install("aroma.light")

###
### load all survey data (code from Fowler et al replication archive)
###

data <- read.dta("../bigsurveys_recoded4.dta")

policy_ind <- 3:329
years <- unique(data$source)

table(years)

###
### Remove modules from datasets (code from Fowler et al replication archive)
###

for(year in years){
    tmp <- data[,policy_ind]
    tmp <- tmp[data$source==year,]
    allna <- is.na(colMeans(tmp,na.rm=T))
    tmp <- tmp[,! allna]
    
    # we need fewer responses than this
    # to determine that an item was asked
    # on a module
    threshold <- 5000
    
    module_items <- c()
    for(i in 1:dim(tmp)[2]){
      vec <- ifelse(tmp[,i]==0,1, tmp[,i])
      if(sum(vec,na.rm=T) < threshold){
        module_items <- c(module_items,i)
      }
    }
    if(length(module_items) > 1){
       module_people <- which(! is.na(rowMeans(tmp[,module_items],na.rm=T)))
    } else {module_people <- c()}
    
    
    print(year)
    print(c("Total people, items:",dim(tmp))) 
    print(c("Module people:",length(module_people))) 
    print(c("Module items:",length(module_items)))
    
    # now that module items have been identified,
    # NA those items for those modules
    if(length(module_items) > 0){
       ind <- which(names(data) %in% names(tmp)[module_items])
       data[data$source==year,ind] <- NA
    }
}


###
### run estimators and save outputs to disk
###

for(i in 1:length(years)){

   ###
   # extract roll call data (code from Fowler et al replication package)
   ###
    
   year <- years[i]
   print(year)
   mat <- apply(data[data$source==year,policy_ind],2,as.numeric)
   exclude <- ! colnames(mat) %in% c("cces2012_immigration5","cces2012_immigration6")
   notna <- which(! is.na(colMeans(mat,na.rm=T)) & exclude)
   mat <- mat[, notna]
   
   # save pid7 to identify polarity later

   data$pid7[data$pid7=="__NA__"] <- NA
   data$pid7 <- as.numeric(data$pid7)
   pid7 <- data$pid7[data$source == year]
   
   ###   
   # Fit "Moderates" mixture model to original data
   ###
   
   if (fitMixIRT){
   
   res <- em_mix_irt(mat,iter=30)
   
   save(res,mat,pid7,file=paste0("outputs/MixIRT_",year,".Rdata"))
   
   }


   ###   
   # Fit "Moderates" mixture model to 5x simulated data sets
   ###
   
   for (r in 1:5){
        mat_alt <- mvn_rc_generator(mat)
        res_alt <- em_mix_irt(mat_alt,iter=30)
        save(res_alt,mat_alt,pid7,file=paste0("outputs/MixIRT_",year,"_R",r,".Rdata"))
   }
   
}

